library(readxl)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.8
## ✓ tidyr 1.2.0 ✓ stringr 1.4.0
## ✓ readr 2.1.2 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(TSstudio)
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
Gold <- read_xlsx("5010DATA.xlsx", sheet = "Gold")
Gold$Date <- as.Date(Gold$Date, format = "%Y-%m-%d")
#class(Gold$Date)
ts_plot(Gold[-nrow(Gold),-2], slider = TRUE,
title = "Monthly PricePercentage Change Gold Price From 2003-2022",
Xtitle = "Percentage Change", Ytitle = "Price(USD)")
ts_plot(Gold[,-3], slider = TRUE,
title = "Monthly Price Change Gold Price From 2003-2022",
Xtitle = "Time", Ytitle = "Price(USD)")
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(survMisc)
## Loading required package: survival
##
## Attaching package: 'survMisc'
## The following object is masked from 'package:forecast':
##
## autoplot
## The following object is masked from 'package:ggplot2':
##
## autoplot
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
price <- ts(Gold$`Last Price`, frequency = 12, start = c(2003, 1), end = c(2022, 4))
price
## Jan Feb Mar Apr May Jun Jul Aug Sep
## 2003 368.15 349.95 337.45 338.55 364.45 346.40 354.35 375.55 385.35
## 2004 402.45 396.15 426.45 386.75 395.55 394.25 391.05 409.85 418.25
## 2005 422.60 435.65 428.35 434.40 417.25 435.50 429.80 435.20 469.30
## 2006 568.90 561.55 583.65 654.43 645.20 615.85 636.75 627.30 598.30
## 2007 653.20 669.35 663.80 678.45 660.50 649.65 664.30 673.30 743.60
## 2008 926.10 973.90 916.90 877.55 886.50 925.40 914.10 831.15 870.95
## 2009 927.85 942.32 919.35 888.20 979.18 926.50 954.00 951.19 1007.70
## 2010 1081.20 1117.59 1113.25 1179.03 1216.30 1242.38 1181.00 1247.40 1308.54
## 2011 1332.68 1411.33 1432.20 1563.70 1535.73 1500.18 1627.05 1825.55 1623.79
## 2012 1737.76 1696.76 1668.15 1664.75 1560.51 1597.45 1614.58 1691.85 1772.25
## 2013 1663.70 1579.61 1597.50 1476.71 1387.80 1234.53 1325.07 1395.27 1329.03
## 2014 1244.55 1326.39 1284.01 1291.60 1249.68 1327.33 1282.59 1287.32 1208.15
## 2015 1283.79 1213.18 1183.57 1184.37 1190.58 1172.35 1095.80 1134.93 1115.09
## 2016 1118.21 1238.67 1232.75 1293.53 1215.32 1321.90 1351.28 1308.97 1315.87
## 2017 1210.72 1248.44 1249.20 1268.28 1268.92 1241.61 1269.44 1321.43 1279.75
## 2018 1345.14 1318.31 1325.48 1315.39 1298.51 1252.60 1224.15 1201.15 1190.88
## 2019 1321.25 1313.32 1292.38 1283.53 1305.58 1409.55 1413.78 1520.38 1472.49
## 2020 1589.16 1585.69 1577.18 1686.50 1730.27 1780.96 1975.86 1967.80 1885.82
## 2021 1847.65 1734.04 1707.71 1769.13 1906.87 1770.11 1814.19 1813.62 1756.95
## 2022 1797.17 1908.99 1937.44 1980.11
## Oct Nov Dec
## 2003 384.25 398.15 415.45
## 2004 428.55 450.95 438.45
## 2005 465.19 493.08 517.00
## 2006 606.60 648.00 636.70
## 2007 796.80 783.55 833.70
## 2008 723.85 818.05 882.05
## 2009 1045.45 1179.63 1096.98
## 2010 1359.40 1386.23 1421.40
## 2011 1714.70 1746.35 1564.91
## 2012 1720.65 1714.98 1675.35
## 2013 1323.06 1253.35 1201.64
## 2014 1172.94 1167.38 1184.37
## 2015 1142.11 1064.77 1061.10
## 2016 1277.21 1173.20 1147.50
## 2017 1271.45 1275.01 1302.80
## 2018 1214.76 1220.52 1282.49
## 2019 1512.99 1463.98 1517.27
## 2020 1878.81 1776.95 1898.36
## 2021 1783.38 1774.52 1829.20
## 2022
percent <- ts(Gold[-nrow(Gold),3], frequency = 12, start = c(2003, 1), end = c(2022, 4))
percent
## Jan Feb Mar Apr May
## 2003 -0.0494363710 -0.0357193885 0.0032597422 0.0765027322 -0.0495266840
## 2004 -0.0156541185 0.0764861795 -0.0930941494 0.0227537169 -0.0032865630
## 2005 0.0308802650 -0.0167565706 0.0141239640 -0.0394797422 0.0437387657
## 2006 -0.0129196695 0.0393553557 0.1212713099 -0.0141038767 -0.0454897706
## 2007 0.0247244336 -0.0082916262 0.0220699006 -0.0264573661 -0.0164269493
## 2008 0.0516142965 -0.0585275696 -0.0429163486 0.0101988491 0.0438804287
## 2009 0.0155951932 -0.0243760082 -0.0338826345 0.1024318847 -0.0538001185
## 2010 0.0336570477 -0.0038833562 0.0590882551 0.0316107309 0.0214420784
## 2011 0.0590164180 0.0147874700 0.0918167854 -0.0178870627 -0.0231486003
## 2012 -0.0235935918 -0.0168615479 -0.0020381860 -0.0626160084 0.0236717483
## 2013 -0.0505439683 0.0113255804 -0.0756118936 -0.0602081654 -0.1104409857
## 2014 0.0657587080 -0.0319513868 0.0059111689 -0.0324558687 0.0621359068
## 2015 -0.0550012074 -0.0244069305 0.0006759212 0.0052432939 -0.0153118648
## 2016 0.1077257402 -0.0047793198 0.0493044007 -0.0604624555 0.0876970674
## 2017 0.0311550152 0.0006087597 0.0152737752 0.0005046204 -0.0215222394
## 2018 -0.0199458792 0.0054387815 -0.0076123367 -0.0128326960 -0.0353559079
## 2019 -0.0060018921 -0.0159443243 -0.0068478311 0.0171791855 0.0796351047
## 2020 -0.0021835435 -0.0053667489 0.0693135850 0.0259531574 0.0292960058
## 2021 -0.0614889184 -0.0151841941 0.0359662940 0.0778574780 -0.0717196243
## 2022 0.0622200460 0.0149031687 0.0220239078 -0.0494363710
## Jun Jul Aug Sep Oct
## 2003 0.0229503464 0.0598278538 0.0260950606 -0.0028545478 0.0361743656
## 2004 -0.0081166772 0.0480756936 0.0204953032 0.0246264196 0.0522692801
## 2005 -0.0130884041 0.0125639832 0.0783547794 -0.0087577243 0.0599539973
## 2006 0.0339368353 -0.0148409894 -0.0462298741 0.0138726391 0.0682492582
## 2007 0.0225506042 0.0135480957 0.1044111095 0.0715438408 -0.0166290161
## 2008 -0.0122109358 -0.0907449951 0.0478854599 -0.1688960331 0.1301374594
## 2009 0.0296815974 -0.0029454927 0.0594097919 0.0374615461 0.1283466450
## 2010 -0.0494051739 0.0562235394 0.0490139490 0.0388677457 0.0197366485
## 2011 0.0845698516 0.1219999385 -0.1105201172 0.0559863036 0.0184580393
## 2012 0.0107233403 0.0478576472 0.0475219434 -0.0291155311 -0.0032952663
## 2013 0.0733396515 0.0529783332 -0.0474746823 -0.0044919979 -0.0526884646
## 2014 -0.0337067647 0.0036878504 -0.0614998602 -0.0291437322 -0.0047402254
## 2015 -0.0652961999 0.0357090710 -0.0174812544 0.0242312280 -0.0677167698
## 2016 0.0222255844 -0.0313110532 0.0052713202 -0.0293798020 -0.0814353160
## 2017 0.0224144458 0.0409550668 -0.0315415875 -0.0064856417 0.0027999528
## 2018 -0.0227127575 -0.0187885472 -0.0085501394 0.0200523982 0.0047416774
## 2019 0.0030009578 0.0754006988 -0.0314987043 0.0275044313 -0.0323928116
## 2020 0.1094353607 -0.0040792364 -0.0416607379 -0.0037172159 -0.0542151681
## 2021 0.0249024072 -0.0003141898 -0.0312468985 0.0150431145 -0.0049680943
## 2022
## Nov Dec
## 2003 0.0434509607 -0.0312913708
## 2004 -0.0277192593 -0.0361500741
## 2005 0.0485113977 0.1003868472
## 2006 -0.0174382716 0.0259148736
## 2007 0.0640035735 0.1108312343
## 2008 0.0782348267 0.0519244941
## 2009 -0.0700643422 -0.0143849478
## 2010 0.0253709702 -0.0624173350
## 2011 -0.1038966988 0.1104536363
## 2012 -0.0231081412 -0.0069537709
## 2013 -0.0412574301 0.0357095303
## 2014 0.0145539584 0.0839433623
## 2015 -0.0034467538 0.0538215060
## 2016 -0.0219058984 0.0550936819
## 2017 0.0217959075 0.0324992324
## 2018 0.0507734408 0.0302224579
## 2019 0.0364007705 0.0473811517
## 2020 0.0683249388 -0.0267125308
## 2021 0.0308139666 -0.0175103871
## 2022
components.p <- decompose(percent)
plot(components.p)
# bc <- boxcox(price ~ 1)
# lam <- bc$x[which.max(bc$y)]
# lam
# ts_plot(BoxCox(price, lam))
Notice what happens when lambda equals 1. In that case, our data shifts down but the shape of the data does not change. Therefore, if the optimal value for lambda is 1, then the data is already normally distributed, and the Box-Cox transformation is unnecessary.
library(ggplot2)
library(forecast)
library(tseries)
fit <- auto.arima(price)
checkresiduals(fit)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,1) with drift
## Q* = 30.072, df = 22, p-value = 0.1167
##
## Model df: 2. Total lags used: 24
plot(forecast(fit, h = 60))
fitp <- auto.arima(percent)
predict(fitp, n.ahead = 12)
## $pred
## Jan Feb Mar Apr May Jun
## 2022 0.015207273 0.007463333
## 2023 0.008291771 0.008291771 0.008291771 0.008291771
## Jul Aug Sep Oct Nov Dec
## 2022 0.008391013 0.008279882 0.008293195 0.008291600 0.008291791 0.008291768
## 2023
##
## $se
## Jan Feb Mar Apr May Jun
## 2022 0.04854813 0.04889524
## 2023 0.04890028 0.04890028 0.04890028 0.04890028
## Jul Aug Sep Oct Nov Dec
## 2022 0.04890021 0.04890028 0.04890028 0.04890028 0.04890028 0.04890028
## 2023
checkresiduals(fitp)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,0) with non-zero mean
## Q* = 28.908, df = 22, p-value = 0.1475
##
## Model df: 2. Total lags used: 24
adf.test(percent) ###station
## Warning in adf.test(percent): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: percent
## Dickey-Fuller = -5.3908, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
plot(forecast(fitp, h = 30),include = 20)
percent %>% stl(s.window='periodic') %>% seasadj() -> ggadj
plot(forecast(auto.arima(ggadj), h=30), include = 20)
abline(h=0, col = "dark red")
The ACF plot of the residuals from the ARIMA(0,1,1) model shows that all
autocorrelations are within the threshold limits, indicating that the
residuals are behaving like white noise. A portmanteau test returns a
large p-value, also suggesting that the residuals are white noise.
Residuals are almost normally distributed. The autocorrelation plot
shows that for the first 12 lags, all sample autocorrelations except
those at lags 6 and 11 fall inside the 95 % confidence bounds indicating
the residuals appear to be random and we do not need any
transformation.
acf(Gold$`Last Price`)
acf(Gold$Percent) ##p=1
pacf(Gold$Percent) ##q=0
###No need to diff
##Agree with auto.arima
lambdap <- BoxCox.lambda(percent)
## Warning in guerrero(x, lower, upper): Guerrero's method for selecting a Box-Cox
## parameter (lambda) is given for strictly positive data.
ndiffs(BoxCox(percent, lambdap)) ###d=0
## [1] 0
lambda <- BoxCox.lambda(price)
ndiffs(BoxCox(price,lambda)) ###d=1
## [1] 1
diff <- diff(Gold$`Last Price`)
ts.plot(diff)
acf(diff) ###p=5
pacf(diff) ###q=5